import requests
import re

import middleware


class HttpException(BaseException):
    pass


class Request:
    def __init__(self, url, parse_callback=None):
        self.url = url
        self.parse_cb = parse_callback

    def __eq__(self, other):
        return self.url == other.url


class BaseSpider:
    requests = []
    urls = []
    domain = None
    http_regex = "((?P<schema>(http(s)?))://)?(?P<domain>[\\w\.-]+)(?P<path>([\/\\w\%\.]+))?(\?(?P<arguments>[^\\s]+))?"

    def get_requests(self):
        for u in self.urls:
            request = Request(u, parse_callback=self.parse)
            self.requests.append(request)
        return self.requests

    def get_domain(self):
        if self.domain is None:
            d = self.get_requests()[0]
            m = re.match(self.http_regex, d)
            self.domain = m.group("schema") + "://" + m.group("domain")

        if self.domain is None:
            raise Exception("Invalid url was given.")

        return self.domain

    def parse_url(self, url):
        m = re.match(self.http_regex, url)
        return m.groupdict()

    def parse(self, response):
        raise NotImplementedError

    def crawl(self):
        for url in self.get_requests():
            assert isinstance(url, Request)
            response = requests.get(url.url)
            if response.status_code not in (200, 299):
                print("ERROR: " + response.reason)
                continue
            new_requests = url.parse_cb(response)
            if not new_requests:
                continue

            for request in new_requests:
                if isinstance(request, Request):
                    self.requests.append(request)
                else:
                    for m in middleware.middlewares:
                        try:
                            m.process_model(request)
                        except middleware.DropModel:
                            break

